Merging datasets¶

In [1]:
import pandas as pd

# Load CSV files
rainfall_df = pd.read_csv('Monthly Rainfall.csv')
water_deficit_df = pd.read_csv('Water deficit.csv')
temperature_max_df = pd.read_csv('Temperature maximum.csv')
temperature_min_df = pd.read_csv('Temperature minimum.csv')
landuse_df = pd.read_csv('Landuse.csv')
fertilizer_consumption_df = pd.read_csv('Fertilizer Consumption.csv')

# Common columns to merge on
common_columns = ['Dist Code', 'Year', 'State Code', 'State Name', 'Dist Name']

# Merge all DataFrames on the common columns
merged_df = rainfall_df.merge(water_deficit_df, on=common_columns, how='outer')\
                       .merge(temperature_max_df, on=common_columns, how='outer')\
                       .merge(temperature_min_df, on=common_columns, how='outer')\
                       .merge(landuse_df, on=common_columns, how='outer')\
                       .merge(fertilizer_consumption_df, on=common_columns, how='outer')
C:\Users\DELL\anaconda3\lib\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
  from pandas.core import (
In [2]:
merged_df
Out[2]:
Dist Code Year State Code State Name Dist Name JANUARY RAINFALL (Millimeters) FEBRUARY RAINFALL (Millimeters) MARCH RAINFALL (Millimeters) APRIL RAINFALL (Millimeters) MAY RAINFALL (Millimeters) ... PHOSPHATE SHARE IN NPK (Percent) PHOSPHATE PER HA OF NCA (Kg per ha) PHOSPHATE PER HA OF GCA (Kg per ha) POTASH CONSUMPTION (tons) POTASH SHARE IN NPK (Percent) POTASH PER HA OF NCA (Kg per ha) POTASH PER HA OF GCA (Kg per ha) TOTAL CONSUMPTION (tons) TOTAL PER HA OF NCA (Kg per ha) TOTAL PER HA OF GCA (Kg per ha)
0 65 2000 5 Karnataka Kolar 0.0 36.0 0.0 17.0 70.0 ... 22.4 37.93 34.68 8386.0 13.6 23.10 21.12 61509.0 169.45 154.93
1 65 2001 5 Karnataka Kolar 1.0 0.0 5.0 75.0 24.0 ... 23.6 34.15 32.18 7148.0 14.1 20.39 19.21 50822.0 144.97 136.61
2 65 2002 5 Karnataka Kolar 0.0 0.0 10.0 26.0 113.0 ... 24.0 41.46 40.49 8389.0 18.3 31.70 30.96 45774.0 172.97 168.95
3 65 2003 5 Karnataka Kolar -1.0 -1.0 -1.0 10.0 3.0 ... 24.6 31.70 31.04 9263.0 20.3 26.10 25.55 45669.0 128.70 125.99
4 65 2004 5 Karnataka Kolar 2.0 1.0 14.0 33.0 172.0 ... 26.4 38.86 37.56 11659.0 21.9 32.27 31.18 53195.0 147.22 142.28
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
535 2025 2013 5 Karnataka Yadagiri 0.0 5.9 0.0 10.6 44.0 ... 30.4 55.26 44.83 7025.0 11.4 20.69 16.79 61759.0 181.92 147.59
536 2025 2014 5 Karnataka Yadagiri NaN NaN NaN NaN NaN ... 33.4 85.98 68.93 10081.0 13.1 33.65 26.98 77071.0 257.30 206.28
537 2025 2015 5 Karnataka Yadagiri NaN NaN NaN NaN NaN ... 33.2 67.63 54.71 6131.0 9.1 18.64 15.08 67113.0 204.02 165.04
538 2025 2016 5 Karnataka Yadagiri NaN NaN NaN NaN NaN ... 34.9 95.16 79.18 9154.0 10.0 27.31 22.72 91389.0 272.62 226.83
539 2025 2017 5 Karnataka Yadagiri NaN NaN NaN NaN NaN ... 34.0 98.85 79.76 9253.0 10.2 29.52 23.82 91067.0 290.56 234.44

540 rows × 79 columns

Check missing values¶

In [3]:
merged_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 540 entries, 0 to 539
Data columns (total 79 columns):
 #   Column                                          Non-Null Count  Dtype  
---  ------                                          --------------  -----  
 0   Dist Code                                       540 non-null    int64  
 1   Year                                            540 non-null    int64  
 2   State Code                                      540 non-null    int64  
 3   State Name                                      540 non-null    object 
 4   Dist Name                                       540 non-null    object 
 5   JANUARY RAINFALL (Millimeters)                  516 non-null    float64
 6   FEBRUARY RAINFALL (Millimeters)                 516 non-null    float64
 7   MARCH RAINFALL (Millimeters)                    516 non-null    float64
 8   APRIL RAINFALL (Millimeters)                    516 non-null    float64
 9   MAY RAINFALL (Millimeters)                      516 non-null    float64
 10  JUNE RAINFALL (Millimeters)                     516 non-null    float64
 11  JULY RAINFALL (Millimeters)                     516 non-null    float64
 12  AUGUST RAINFALL (Millimeters)                   516 non-null    float64
 13  SEPTEMBER RAINFALL (Millimeters)                516 non-null    float64
 14  OCTOBER RAINFALL (Millimeters)                  516 non-null    float64
 15  NOVEMBER RAINFALL (Millimeters)                 516 non-null    float64
 16  DECEMBER RAINFALL (Millimeters)                 516 non-null    float64
 17  ANNUAL RAINFALL (Millimeters)                   516 non-null    float64
 18  JANUARY WATER DEFICIT (Millimeters)             464 non-null    float64
 19  FEBRUARY WATER DEFICIT (Millimeters)            464 non-null    float64
 20  MARCH WATER DEFICIT (Millimeters)               464 non-null    float64
 21  APRIL WATER DEFICIT (Millimeters)               464 non-null    float64
 22  MAY WATER DEFICIT (Millimeters)                 464 non-null    float64
 23  JUNE WATER DEFICIT (Millimeters)                464 non-null    float64
 24  JULY WATER DEFICIT (Millimeters)                464 non-null    float64
 25  AUGUST WATER DEFICIT (Millimeters)              464 non-null    float64
 26  OCTOBER WATER DEFICIT (Millimeters)             464 non-null    float64
 27  NOVEMBER WATER DEFICIT (Millimeters)            464 non-null    float64
 28  DECEMBER WATER DEFICIT (Millimeters)            464 non-null    float64
 29  JANUARY MAXIMUM (Centigrate)                    464 non-null    float64
 30  FEBRUARY MAXIMUM (Centigrate)                   464 non-null    float64
 31  MARCH MAXIMUM (Centigrate)                      464 non-null    float64
 32  APRIL MAXIMUM (Centigrate)                      464 non-null    float64
 33  MAY MAXIMUM (Centigrate)                        464 non-null    float64
 34  JUNE MAXIMUM (Centigrate)                       464 non-null    float64
 35  JULY MAXIMUM (Centigrate)                       464 non-null    float64
 36  AUGUST MAXIMUM (Centigrate)                     464 non-null    float64
 37  SEPTEMBER MAXIMUM (Centigrate)                  464 non-null    float64
 38  OCTOBER MAXIMUM (Centigrate)                    464 non-null    float64
 39  NOVEMBER MAXIMUM (Centigrate)                   464 non-null    float64
 40  DECEMBER MAXIMUM (Centigrate)                   464 non-null    float64
 41  JANUARY MINIMUM (Centigrate)                    464 non-null    float64
 42  FEBRUARY MINIMUM (Centigrate)                   464 non-null    float64
 43  MARCH MINIMUM (Centigrate)                      464 non-null    float64
 44  APRIL MINIMUM (Centigrate)                      464 non-null    float64
 45  MAY MINIMUM (Centigrate)                        464 non-null    float64
 46  JUNE MINIMUM (Centigrate)                       464 non-null    float64
 47  JULY MINIMUM (Centigrate)                       464 non-null    float64
 48  AUGUST MINIMUM (Centigrate)                     464 non-null    float64
 49  SEPTEMBER MINIMUM (Centigrate)                  464 non-null    float64
 50  OCTOBER MINIMUM (Centigrate)                    464 non-null    float64
 51  NOVEMBER MINIMUM (Centigrate)                   464 non-null    float64
 52  DECEMBER MINIMUM (Centigrate)                   464 non-null    float64
 53  TOTAL AREA (1000 ha)                            516 non-null    float64
 54  FOREST AREA (1000 ha)                           516 non-null    float64
 55  BARREN AND UNCULTIVABLE LAND AREA (1000 ha)     516 non-null    float64
 56  LAND PUT TO NONAGRICULTURAL USE AREA (1000 ha)  516 non-null    float64
 57  CULTIVABLE WASTE LAND AREA (1000 ha)            516 non-null    float64
 58  PERMANENT PASTURES AREA (1000 ha)               516 non-null    float64
 59  OTHER FALLOW AREA (1000 ha)                     516 non-null    float64
 60  CURRENT FALLOW AREA (1000 ha)                   516 non-null    float64
 61  NET CROPPED AREA (1000 ha)                      516 non-null    float64
 62  GROSS CROPPED AREA (1000 ha)                    516 non-null    float64
 63  CROPING INTENSITY (Percent)                     516 non-null    float64
 64  NITROGEN CONSUMPTION (tons)                     516 non-null    float64
 65  NITROGEN SHARE IN NPK (Percent)                 516 non-null    float64
 66  NITROGEN PER HA OF NCA (Kg per ha)              516 non-null    float64
 67  NITROGEN PER HA OF GCA (Kg per ha)              516 non-null    float64
 68  PHOSPHATE CONSUMPTION (tons)                    516 non-null    float64
 69  PHOSPHATE SHARE IN NPK (Percent)                516 non-null    float64
 70  PHOSPHATE PER HA OF NCA (Kg per ha)             516 non-null    float64
 71  PHOSPHATE PER HA OF GCA (Kg per ha)             516 non-null    float64
 72  POTASH CONSUMPTION (tons)                       516 non-null    float64
 73  POTASH SHARE IN NPK (Percent)                   516 non-null    float64
 74  POTASH PER HA OF NCA (Kg per ha)                516 non-null    float64
 75  POTASH PER HA OF GCA (Kg per ha)                516 non-null    float64
 76  TOTAL CONSUMPTION (tons)                        516 non-null    float64
 77  TOTAL PER HA OF NCA (Kg per ha)                 516 non-null    float64
 78  TOTAL PER HA OF GCA (Kg per ha)                 516 non-null    float64
dtypes: float64(74), int64(3), object(2)
memory usage: 333.4+ KB

Handling Missing values¶

In [4]:
import pandas as pd
import numpy as np

# Step 1: Replace missing values in numeric columns with the mean
numeric_columns = merged_df.select_dtypes(include=[np.number]).columns.tolist()
merged_df[numeric_columns] = merged_df[numeric_columns].apply(lambda x: x.fillna(x.mean()))

# Step 2: Drop rows where string datatype values are missing
string_columns = merged_df.select_dtypes(include=['object']).columns.tolist()
merged_df.dropna(subset=string_columns, inplace=True)

# Step 3: reset index if rows were dropped
merged_df.reset_index(drop=True, inplace=True)
In [5]:
merged_df
Out[5]:
Dist Code Year State Code State Name Dist Name JANUARY RAINFALL (Millimeters) FEBRUARY RAINFALL (Millimeters) MARCH RAINFALL (Millimeters) APRIL RAINFALL (Millimeters) MAY RAINFALL (Millimeters) ... PHOSPHATE SHARE IN NPK (Percent) PHOSPHATE PER HA OF NCA (Kg per ha) PHOSPHATE PER HA OF GCA (Kg per ha) POTASH CONSUMPTION (tons) POTASH SHARE IN NPK (Percent) POTASH PER HA OF NCA (Kg per ha) POTASH PER HA OF GCA (Kg per ha) TOTAL CONSUMPTION (tons) TOTAL PER HA OF NCA (Kg per ha) TOTAL PER HA OF GCA (Kg per ha)
0 65 2000 5 Karnataka Kolar 0.000000 36.000000 0.00000 17.000000 70.00000 ... 22.4 37.93 34.68 8386.0 13.6 23.10 21.12 61509.0 169.45 154.93
1 65 2001 5 Karnataka Kolar 1.000000 0.000000 5.00000 75.000000 24.00000 ... 23.6 34.15 32.18 7148.0 14.1 20.39 19.21 50822.0 144.97 136.61
2 65 2002 5 Karnataka Kolar 0.000000 0.000000 10.00000 26.000000 113.00000 ... 24.0 41.46 40.49 8389.0 18.3 31.70 30.96 45774.0 172.97 168.95
3 65 2003 5 Karnataka Kolar -1.000000 -1.000000 -1.00000 10.000000 3.00000 ... 24.6 31.70 31.04 9263.0 20.3 26.10 25.55 45669.0 128.70 125.99
4 65 2004 5 Karnataka Kolar 2.000000 1.000000 14.00000 33.000000 172.00000 ... 26.4 38.86 37.56 11659.0 21.9 32.27 31.18 53195.0 147.22 142.28
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
535 2025 2013 5 Karnataka Yadagiri 0.000000 5.900000 0.00000 10.600000 44.00000 ... 30.4 55.26 44.83 7025.0 11.4 20.69 16.79 61759.0 181.92 147.59
536 2025 2014 5 Karnataka Yadagiri 1.790504 3.360078 9.42093 41.324419 89.79845 ... 33.4 85.98 68.93 10081.0 13.1 33.65 26.98 77071.0 257.30 206.28
537 2025 2015 5 Karnataka Yadagiri 1.790504 3.360078 9.42093 41.324419 89.79845 ... 33.2 67.63 54.71 6131.0 9.1 18.64 15.08 67113.0 204.02 165.04
538 2025 2016 5 Karnataka Yadagiri 1.790504 3.360078 9.42093 41.324419 89.79845 ... 34.9 95.16 79.18 9154.0 10.0 27.31 22.72 91389.0 272.62 226.83
539 2025 2017 5 Karnataka Yadagiri 1.790504 3.360078 9.42093 41.324419 89.79845 ... 34.0 98.85 79.76 9253.0 10.2 29.52 23.82 91067.0 290.56 234.44

540 rows × 79 columns

In [6]:
merged_df.isnull().sum()
Out[6]:
Dist Code                           0
Year                                0
State Code                          0
State Name                          0
Dist Name                           0
                                   ..
POTASH PER HA OF NCA (Kg per ha)    0
POTASH PER HA OF GCA (Kg per ha)    0
TOTAL CONSUMPTION (tons)            0
TOTAL PER HA OF NCA (Kg per ha)     0
TOTAL PER HA OF GCA (Kg per ha)     0
Length: 79, dtype: int64
In [7]:
merged_df['Year'].unique()
Out[7]:
array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017], dtype=int64)

convert the cleaned dataframe to a csv file¶

In [8]:
cleaned_data = merged_df.to_csv('cleaned_set.csv', index = True) 

Visualizations¶

In [9]:
merged_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 540 entries, 0 to 539
Data columns (total 79 columns):
 #   Column                                          Non-Null Count  Dtype  
---  ------                                          --------------  -----  
 0   Dist Code                                       540 non-null    int64  
 1   Year                                            540 non-null    int64  
 2   State Code                                      540 non-null    int64  
 3   State Name                                      540 non-null    object 
 4   Dist Name                                       540 non-null    object 
 5   JANUARY RAINFALL (Millimeters)                  540 non-null    float64
 6   FEBRUARY RAINFALL (Millimeters)                 540 non-null    float64
 7   MARCH RAINFALL (Millimeters)                    540 non-null    float64
 8   APRIL RAINFALL (Millimeters)                    540 non-null    float64
 9   MAY RAINFALL (Millimeters)                      540 non-null    float64
 10  JUNE RAINFALL (Millimeters)                     540 non-null    float64
 11  JULY RAINFALL (Millimeters)                     540 non-null    float64
 12  AUGUST RAINFALL (Millimeters)                   540 non-null    float64
 13  SEPTEMBER RAINFALL (Millimeters)                540 non-null    float64
 14  OCTOBER RAINFALL (Millimeters)                  540 non-null    float64
 15  NOVEMBER RAINFALL (Millimeters)                 540 non-null    float64
 16  DECEMBER RAINFALL (Millimeters)                 540 non-null    float64
 17  ANNUAL RAINFALL (Millimeters)                   540 non-null    float64
 18  JANUARY WATER DEFICIT (Millimeters)             540 non-null    float64
 19  FEBRUARY WATER DEFICIT (Millimeters)            540 non-null    float64
 20  MARCH WATER DEFICIT (Millimeters)               540 non-null    float64
 21  APRIL WATER DEFICIT (Millimeters)               540 non-null    float64
 22  MAY WATER DEFICIT (Millimeters)                 540 non-null    float64
 23  JUNE WATER DEFICIT (Millimeters)                540 non-null    float64
 24  JULY WATER DEFICIT (Millimeters)                540 non-null    float64
 25  AUGUST WATER DEFICIT (Millimeters)              540 non-null    float64
 26  OCTOBER WATER DEFICIT (Millimeters)             540 non-null    float64
 27  NOVEMBER WATER DEFICIT (Millimeters)            540 non-null    float64
 28  DECEMBER WATER DEFICIT (Millimeters)            540 non-null    float64
 29  JANUARY MAXIMUM (Centigrate)                    540 non-null    float64
 30  FEBRUARY MAXIMUM (Centigrate)                   540 non-null    float64
 31  MARCH MAXIMUM (Centigrate)                      540 non-null    float64
 32  APRIL MAXIMUM (Centigrate)                      540 non-null    float64
 33  MAY MAXIMUM (Centigrate)                        540 non-null    float64
 34  JUNE MAXIMUM (Centigrate)                       540 non-null    float64
 35  JULY MAXIMUM (Centigrate)                       540 non-null    float64
 36  AUGUST MAXIMUM (Centigrate)                     540 non-null    float64
 37  SEPTEMBER MAXIMUM (Centigrate)                  540 non-null    float64
 38  OCTOBER MAXIMUM (Centigrate)                    540 non-null    float64
 39  NOVEMBER MAXIMUM (Centigrate)                   540 non-null    float64
 40  DECEMBER MAXIMUM (Centigrate)                   540 non-null    float64
 41  JANUARY MINIMUM (Centigrate)                    540 non-null    float64
 42  FEBRUARY MINIMUM (Centigrate)                   540 non-null    float64
 43  MARCH MINIMUM (Centigrate)                      540 non-null    float64
 44  APRIL MINIMUM (Centigrate)                      540 non-null    float64
 45  MAY MINIMUM (Centigrate)                        540 non-null    float64
 46  JUNE MINIMUM (Centigrate)                       540 non-null    float64
 47  JULY MINIMUM (Centigrate)                       540 non-null    float64
 48  AUGUST MINIMUM (Centigrate)                     540 non-null    float64
 49  SEPTEMBER MINIMUM (Centigrate)                  540 non-null    float64
 50  OCTOBER MINIMUM (Centigrate)                    540 non-null    float64
 51  NOVEMBER MINIMUM (Centigrate)                   540 non-null    float64
 52  DECEMBER MINIMUM (Centigrate)                   540 non-null    float64
 53  TOTAL AREA (1000 ha)                            540 non-null    float64
 54  FOREST AREA (1000 ha)                           540 non-null    float64
 55  BARREN AND UNCULTIVABLE LAND AREA (1000 ha)     540 non-null    float64
 56  LAND PUT TO NONAGRICULTURAL USE AREA (1000 ha)  540 non-null    float64
 57  CULTIVABLE WASTE LAND AREA (1000 ha)            540 non-null    float64
 58  PERMANENT PASTURES AREA (1000 ha)               540 non-null    float64
 59  OTHER FALLOW AREA (1000 ha)                     540 non-null    float64
 60  CURRENT FALLOW AREA (1000 ha)                   540 non-null    float64
 61  NET CROPPED AREA (1000 ha)                      540 non-null    float64
 62  GROSS CROPPED AREA (1000 ha)                    540 non-null    float64
 63  CROPING INTENSITY (Percent)                     540 non-null    float64
 64  NITROGEN CONSUMPTION (tons)                     540 non-null    float64
 65  NITROGEN SHARE IN NPK (Percent)                 540 non-null    float64
 66  NITROGEN PER HA OF NCA (Kg per ha)              540 non-null    float64
 67  NITROGEN PER HA OF GCA (Kg per ha)              540 non-null    float64
 68  PHOSPHATE CONSUMPTION (tons)                    540 non-null    float64
 69  PHOSPHATE SHARE IN NPK (Percent)                540 non-null    float64
 70  PHOSPHATE PER HA OF NCA (Kg per ha)             540 non-null    float64
 71  PHOSPHATE PER HA OF GCA (Kg per ha)             540 non-null    float64
 72  POTASH CONSUMPTION (tons)                       540 non-null    float64
 73  POTASH SHARE IN NPK (Percent)                   540 non-null    float64
 74  POTASH PER HA OF NCA (Kg per ha)                540 non-null    float64
 75  POTASH PER HA OF GCA (Kg per ha)                540 non-null    float64
 76  TOTAL CONSUMPTION (tons)                        540 non-null    float64
 77  TOTAL PER HA OF NCA (Kg per ha)                 540 non-null    float64
 78  TOTAL PER HA OF GCA (Kg per ha)                 540 non-null    float64
dtypes: float64(74), int64(3), object(2)
memory usage: 333.4+ KB
In [36]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 8))
sns.boxplot(data=merged_df, x='Dist Name', y='ANNUAL RAINFALL (Millimeters)')
plt.title('Annual Rainfall Distribution by Dist')
plt.xticks(rotation=90)
plt.show()
In [11]:
'''
1.more bigger the box more dispersed data which means if a box is big the rainfall is varying
and not average throughout eg shimoge rainfall varies from 1300mm to 3100mm
The longer the box, the more dispersed the data. The smaller, the less dispersed the data.

2.middle line is median 
The median is the average value from a set of data and is shown by the line that divides the box into two parts.
Half the scores are greater than or equal to this value, and half are less.
'''
Out[11]:
'\n1.more bigger the box more dispersed data which means if a box is big the rainfall is varying\nand not average throughout eg shimoge rainfall varies from 1300mm to 3100mm\nThe longer the box, the more dispersed the data. The smaller, the less dispersed the data.\n\n2.middle line is median \nThe median is the average value from a set of data and is shown by the line that divides the box into two parts.\nHalf the scores are greater than or equal to this value, and half are less.\n'
In [12]:
months = ['JANUARY', 'FEBRUARY', 'MARCH', 'APRIL', 'MAY', 'JUNE', 
          'JULY', 'AUGUST', 'SEPTEMBER', 'OCTOBER', 'NOVEMBER', 'DECEMBER']
rainfall_cols = [f'{month} RAINFALL (Millimeters)' for month in months]
max_temp_cols = [f'{month} MAXIMUM (Centigrate)' for month in months]
min_temp_cols = [f'{month} MINIMUM (Centigrate)' for month in months]

plt.figure(figsize=(16, 10))
for i, month in enumerate(months):
    plt.subplot(3, 4, i + 1)
    sns.lineplot(data=merged_df, x='Year', y=rainfall_cols[i], label='Rainfall')
    sns.lineplot(data=merged_df, x='Year', y=max_temp_cols[i], label='Max Temp', color='r')
    sns.lineplot(data=merged_df, x='Year', y=min_temp_cols[i], label='Min Temp', color='g')
    plt.title(month)
    plt.xlabel('')
    plt.ylabel('')

plt.tight_layout()
plt.show()
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
In [13]:
'''
y-rainfall,temp
x-year 
1. Rainfall season from june to september: ideally we need to have high rainfall in this range but with the graph we can tell
that the rainfall is varying drastically when the farmer expects consistent rain due to which soil crops like moong,rice,corn
are affected in areas like udupi ,chikmagunglur 
2.Post-Monsoon Period (October to December): there is increse in instances of unexpected rain in the post monsoon period 
3.Summer Season (March to May) : Rainfall is increasing in summer season that causes nitrogen loss 
4. winter season January to february : temperature is increasing in winters which causes more water consumption for plants 
'''
Out[13]:
'\ny-rainfall,temp\nx-year \n1. Rainfall season from june to september: ideally we need to have high rainfall in this range but with the graph we can tell\nthat the rainfall is varying drastically when the farmer expects consistent rain due to which soil crops like moong,rice,corn\nare affected in areas like udupi ,chikmagunglur \n2.Post-Monsoon Period (October to December): there is increse in instances of unexpected rain in the post monsoon period \n3.Summer Season (March to May) : Rainfall is increasing in summer season that causes nitrogen loss \n4. winter season January to february : temperature is increasing in winters which causes more water consumption for plants \n'
In [34]:
land_use_cols = ['FOREST AREA (1000 ha)', 'BARREN AND UNCULTIVABLE LAND AREA (1000 ha)', 
                 'LAND PUT TO NONAGRICULTURAL USE AREA (1000 ha)', 'CULTIVABLE WASTE LAND AREA (1000 ha)', 
                 'PERMANENT PASTURES AREA (1000 ha)', 'OTHER FALLOW AREA (1000 ha)', 
                 'CURRENT FALLOW AREA (1000 ha)', 'NET CROPPED AREA (1000 ha)']

land_use_df = merged_df.groupby('Dist Name')[land_use_cols].sum()

land_use_df.plot(kind='bar', stacked=True, figsize=(14, 8))
plt.title('Land Use Distribution by District')
plt.xlabel('District')
plt.ylabel('Area (1000 ha)')
plt.xticks(rotation=90)
plt.legend(loc='upper right')
plt.show()
In [17]:
land_use_col = 'LAND PUT TO NONAGRICULTURAL USE AREA (1000 ha)'

# Group by 'Dist Name' and 'Year'
land_use_df = merged_df.groupby(['Dist Name', 'Year'])[land_use_col].sum().reset_index()

# Initialize the plot
plt.figure(figsize=(16, 10))

# Create a bar plot
sns.barplot(data=land_use_df, x='Dist Name', y=land_use_col, hue='Year', palette='viridis')

# Set the title and labels
plt.title('Land Put to Nonagricultural Use Area (1000 ha) Year-wise for Every District')
plt.xlabel('District')
plt.ylabel('Area (1000 ha)')
plt.xticks(rotation=90)
plt.legend(title='Year', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()
In [18]:
'''
metro cities like bangalore are using agricultural land for urbanization
'''
Out[18]:
'\nmetro cities like bangalore are using agricultural land for urbanization\n'
In [35]:
fertilizer_cols = ['NITROGEN PER HA OF NCA (Kg per ha)', 'PHOSPHATE PER HA OF NCA (Kg per ha)', 
                   'POTASH PER HA OF NCA (Kg per ha)', 'TOTAL PER HA OF NCA (Kg per ha)']

fertilizer_df = merged_df.groupby('Dist Name')[fertilizer_cols].mean()

plt.figure(figsize=(14, 8))
sns.heatmap(fertilizer_df, annot=True, cmap='YlGnBu')
plt.title('Average Fertilizer Consumption per Hectare by District')
plt.xlabel('Fertilizer Type')
plt.ylabel('District')
plt.show()
In [ ]:
'''
We can conclude here that Banglore Urban has the highest fertilizer consumption and due to pollution and changing climate.
And Bijapur consumes the lowest. 
'''
In [23]:
correlation_cols = ['ANNUAL RAINFALL (Millimeters)', 'TOTAL AREA (1000 ha)', 
                    'FOREST AREA (1000 ha)', 'NET CROPPED AREA (1000 ha)', 
                    'NITROGEN PER HA OF NCA (Kg per ha)', 'PHOSPHATE PER HA OF NCA (Kg per ha)', 
                    'POTASH PER HA OF NCA (Kg per ha)', 'TOTAL PER HA OF NCA (Kg per ha)']

correlation_matrix = merged_df[correlation_cols].corr()

plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
In [24]:
'''
Shows correlation between two variables. 
Positive: Value closer to 1 indicates positive correlation. one variable Increases then other also increases.
Negative: Value closer to -1 means if i variable increases then other decreases.
Diagonal: always 1 as they show correlation of each variable with itself.


Key Observations:

Annual Rainfall (Millimeters):
Positively correlated with Forest Area (0.42). means forest area recived more rainfall 
Negatively correlated with Net Cropped Area (-0.42). less rainfall in cropped area 


Net Cropped Area (1000 ha):
Strongly positively correlated with Total Area (0.8).
Negatively correlated with Annual Rainfall (-0.42). annually rainfall is decreasing 

understanding the inverse relationship between rainfall and net cropped area can help in planning 
agricultural activities and selecting suitable crops that can tolerate varying rainfall conditions.

Total per ha of NCA (Kg per ha):

Very strongly positively correlated with Nitrogen per ha of NCA (0.98) and Phosphate per ha of NCA (0.96).
Strongly positively correlated with Potash per ha of NCA (0.81).

The strong correlations among different fertilizers suggest that interventions aimed at optimizing
fertilizer use should consider all types simultaneously rather than in isolation.
'''
Out[24]:
'\nKey Observations:\n\nAnnual Rainfall (Millimeters):\nPositively correlated with Forest Area (0.42). means forest area recived more rainfall \nNegatively correlated with Net Cropped Area (-0.42). less rainfall in cropped area \n\n\nNet Cropped Area (1000 ha):\nStrongly positively correlated with Total Area (0.8).\nNegatively correlated with Annual Rainfall (-0.42). annually rainfall is decreasing \n\nunderstanding the inverse relationship between rainfall and net cropped area can help in planning \nagricultural activities and selecting suitable crops that can tolerate varying rainfall conditions.\n\nTotal per ha of NCA (Kg per ha):\n\nVery strongly positively correlated with Nitrogen per ha of NCA (0.98) and Phosphate per ha of NCA (0.96).\nStrongly positively correlated with Potash per ha of NCA (0.81).\n\nThe strong correlations among different fertilizers suggest that interventions aimed at optimizing\nfertilizer use should consider all types simultaneously rather than in isolation.\n'
In [19]:
import pandas as pd
import plotly.express as px

fig = px.line(merged_df, 
              x='Year', 
              y='CULTIVABLE WASTE LAND AREA (1000 ha)', 
              color='Dist Name',
              title='Cultivable Waste Land Area (1000 ha) Year-wise for Every District',
              labels={'CULTIVABLE WASTE LAND AREA (1000 ha)': 'Cultivable Waste Land Area (1000 ha)', 'Dist Name': 'District'})

# Show the plot
fig.show()
C:\Users\DELL\anaconda3\lib\site-packages\plotly\express\_core.py:1979: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  sf: grouped.get_group(s if len(s) > 1 else s[0])
In [ ]:
'''
1. Districts like ramnagaram are resusing cultivable waste land area , as there is a decrease in cultivable waste land area 
since 2004 to 2017
'''
In [25]:
import pandas as pd
import plotly.express as px


# Prompt user to enter the district name
district_name = input("Enter the district name: ")

# Filter the dataframe for the specified district
district_df = merged_df[merged_df['Dist Name'] == district_name]

# Check if the district name exists in the dataframe
if district_df.empty:
    print(f"No data available for district: {district_name}")
else:
    # Create an line plot for fertilizers used every year in the specified district
    fig = px.line(district_df, 
                  x='Year', 
                  y=['NITROGEN CONSUMPTION (tons)', 'PHOSPHATE CONSUMPTION (tons)', 'POTASH CONSUMPTION (tons)'],
                  title=f'Fertilizers Used Every Year in {district_name}',
                  labels={'value': 'Consumption (tons)', 'variable': 'Fertilizer Type'},
                  markers=True)

    fig.update_layout(xaxis_title='Year',
                      yaxis_title='Fertilizer Consumption (tons)',
                      legend_title='Fertilizer Type')

    # Show the plot
    fig.show()
Enter the district name: Udupi
In [37]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt


# Selecting relevant columns 
features = ['TOTAL AREA (1000 ha)', 'FOREST AREA (1000 ha)', 'NET CROPPED AREA (1000 ha)', 
            'NITROGEN PER HA OF NCA (Kg per ha)', 'PHOSPHATE PER HA OF NCA (Kg per ha)', 
            'POTASH PER HA OF NCA (Kg per ha)', 'TOTAL PER HA OF NCA (Kg per ha)']

target = 'ANNUAL RAINFALL (Millimeters)'

# Preprocess the data
X = merged_df[features]
y = merged_df[target]

# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# Evaluate the model
train_mae = mean_absolute_error(y_train, y_pred_train)
train_r2 = r2_score(y_train, y_pred_train)

test_mae = mean_absolute_error(y_test, y_pred_test)
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
test_r2 = r2_score(y_test, y_pred_test)

print(f'Training MAE: {train_mae}')
print(f'Training R²: {train_r2}')
print(f'Test MAE: {test_mae}')

print(f'Test R²: {test_r2}')

# Plotting actual vs predicted rainfall
plt.figure(figsize=(14, 6))
plt.scatter(y_test, y_pred_test, alpha=0.7)
plt.xlabel('Actual Annual Rainfall (Millimeters)')
plt.ylabel('Predicted Annual Rainfall (Millimeters)')
plt.title('Actual vs Predicted Annual Rainfall')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red')  # Diagonal line
plt.show()
Training MAE: 536.6353482568031
Training R²: 0.36440114449655314
Test MAE: 502.21089775031083
Test R²: 0.4075420470392863
In [ ]:
'''
Training MAE: 536.64
Test MAE: 502.21
Observation: Both the training and test MAE values are relatively close, which indicates that 
the model is performing consistently on both the training and test datasets. MAE values around 500 
indicate that, on average, the model's predictions are off by about 500 millimeters from the actual rainfall values.

Training and Test R² (Coefficient of Determination):

Training R²: 0.36
Test R²: 0.41
These R² values suggest that the model has moderate predictive power but there is still significant room for improvement.
Higher R² values would indicate better model performance.

Each point on the scatter plot represents an actual vs. predicted pair from the test set.
The closer the points are to the diagonal line (where y = x), the better the model's predictions. Points directly on this line indicate perfect predictions.


'''